ChIPseq Heatmaps

Tamas Schauer

2020-09-01

Setup Data

library(HelpersforChIPSeq)
data_dir <- system.file("extdata/", package = "HelpersforChIPSeq")

my_mats <- file.path(data_dir ,list.files(path = data_dir, pattern = "^mat"))

for(i in seq_along(my_mats)){
        
        load(my_mats[i])
}

my_mats <- ls(pattern = "^mat")
my_mats
## [1] "mat.TSS.H2AvChIP_rep1.dmelNorm" "mat.TSS.H2AvChIP_rep1.dvirNorm"
## [3] "mat.TSS.H2AvChIP_rep2.dmelNorm" "mat.TSS.H2AvChIP_rep2.dvirNorm"
# Note: these matrices were binned with a size of 10
# see composite plot tutorial how to generate matrices
my_binning <- 10

# they correspond to TSS +/- 2000 bp
dim(get(my_mats[1]))
## [1] 3496  400
# check whether row names are identical in all matrices
all(sapply(seq_along(my_mats), function(i){(identical(rownames(get(my_mats[length(my_mats)])), rownames(get(my_mats[i]))))}))
## [1] TRUE
# example rows and columns
get(my_mats[1])[1:5,201:210] 
##                 4    14    24    34    44    54    64    74    84    94
## FBgn0000018 1.552 1.554 1.531 1.817 2.293 2.293 2.379 2.426 2.499 2.424
## FBgn0000052 0.847 0.821 0.940 1.040 1.030 1.091 1.394 1.604 1.880 1.915
## FBgn0000053 1.584 1.474 1.504 1.657 1.776 1.696 1.617 1.356 1.401 1.621
## FBgn0000055 0.730 0.714 0.700 0.729 0.762 0.746 0.848 0.885 0.870 0.870
## FBgn0000056 0.730 0.714 0.700 0.729 0.762 0.746 0.848 0.885 0.870 0.870
library(matrixStats)

# order by row average signal
my_order <- order(rowMeans(averageMats(my_mats)), decreasing = T)

orderMats(my_mats = my_mats,
          my_order = my_order)

# example rows and columns
get(my_mats[1])[1:5,201:210]
##                 4    14    24    34    44    54    64    74    84    94
## FBgn0051718 1.795 1.695 1.739 1.736 1.848 1.843 1.888 1.993 2.050 2.190
## FBgn0032240 0.985 1.106 1.157 1.228 1.451 1.571 1.669 1.904 1.990 2.091
## FBgn0067622 0.985 1.106 1.157 1.228 1.451 1.571 1.669 1.904 1.990 2.091
## FBgn0031398 1.193 1.123 1.168 2.152 2.265 2.252 2.507 3.017 3.116 3.070
## FBgn0032447 1.766 1.632 1.648 1.754 2.176 2.230 2.590 2.682 2.831 2.899
# filter out TSSs with "flat" signal 
# row standard deviation is higher than a threshold
my_filter <- rowSds(averageMats(my_mats)) > 0.25

# order function can be used for filtering as well
orderMats(my_mats = my_mats,
          my_order = my_filter)

dim(get(my_mats[1]))
## [1] 2084  400

Heatmaps

library(RColorBrewer)

# setup names and titles
my_site_name <- "TSS"
my_sample_names <- gsub("\\.","\n",gsub(paste0(".*", my_site_name, "."),"",my_mats))

# plot heatmaps
plotHeatmap(my_sample_mats = my_mats, 
            my_sample_names = my_sample_names,
            my_site_name = my_site_name,
            my_binning = my_binning)

# change contrast
plotHeatmap(my_sample_mats = my_mats, 
            my_sample_names = my_sample_names,
            my_site_name = my_site_name,
            my_binning = my_binning, 
            min_value = 2,
            max_value = 4)

# change color 
plotHeatmap(my_sample_mats = my_mats, 
            my_sample_names = my_sample_names,
            my_site_name = my_site_name,
            my_binning = my_binning,
            my_colors = colorRampPalette(c("white","orange","red","black"))(100))

# add some smoothing (odd number)
# Note: smoothing is on top of the binning (whcih was done before this session)
plotHeatmap(my_sample_mats = my_mats, 
            my_sample_names = my_sample_names,
            my_site_name = my_site_name,
            my_binning = my_binning, 
            my_colors = colorRampPalette(c("white","orange","red","black"))(100), 
            smoother = 11)

### re-order matrices based on average signal 500 bp downstream of the center
my_order <- order(rowMeans(averageMats(my_mats)[,201:250]), decreasing = T)

orderMats(my_mats = my_mats,
          my_order = my_order)
# plot re-ordered heatmaps
plotHeatmap(my_sample_mats = my_mats, 
            my_sample_names = my_sample_names,
            my_site_name = my_site_name, 
            my_binning = my_binning, 
            my_colors = colorRampPalette(c("white","orange","red","black"))(100), 
            smoother = 11)

### kmeans clustering
k <- 6
km <- kmeans(averageMats(my_mats), centers = k)
my_order <- unlist(sapply(1:k, function(i){which(names(km$cluster) %in% names(km$cluster)[km$cluster == i])}))

orderMats(my_mats = my_mats,
          my_order = my_order)
# plot clustered heatmaps
plotHeatmap(my_sample_mats = my_mats, 
            my_sample_names = my_sample_names,
            my_site_name = my_site_name, 
            my_binning = my_binning, 
            my_colors = colorRampPalette(c("white","orange","red","black"))(100), 
            smoother = 11)

# save heatmap as png
png("heatmaps.TSS.png", height = 7, width = 7, units = "in", res = 200)

plotHeatmap(my_sample_mats = my_mats, 
            my_sample_names = my_sample_names,
            my_site_name = my_site_name, 
            my_binning = my_binning, 
            my_colors = colorRampPalette(c("white","orange","red","black"))(100), 
            smoother = 11)

dev.off()